Opis zestawu danych
Wczytanie danych
Wczytanie danych z pliku i nazwanie kolumn.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.5 ✓ dplyr 1.0.3
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
drugs <- read.delim("drug_consumption.data", header = FALSE, sep = ",", fill = TRUE)
#str(data)
colnames(drugs) <- c(
"id", #1
"age", #2
"gender", #3
"education", #4
"country", #5
"ethnicity", #6
"nscore", #7
"escore", #8
"oscore", #9
"ascore", #10
"cscore", #11
"impulsive", #12
"sensationSeeing", #13
"alcohol", #14
"amphetamine", #15
"amyl", #16
"benzos", #17
"caffeine", #18
"cannabis", #19
"chocolate", #20
"cocaine", #21
"crack", #22
"ecstasy", #23
"heroin", #24
"ketamine", #25
"legalHighs", #26
"lsd", #27
"meth", #28
"mushrooms", #29
"nicotine", #30
"semer", #31
"vsa" #32
)
# cast values to characters to avoid negative indexing
drugs[ ,c(2:11)] <- lapply(drugs[ ,c(2:11)], as.character)
#usage
usage <- c(
"Never Used",
"Used over a Decade Ago",
"Used in Last Decade",
"Used in Last Year",
"Used in Last Month",
"Used in Last Week",
"Used in Last Day"
)
names(usage) <- c("CL0", "CL1", "CL2", "CL3", "CL4", "CL5", "CL6")
usage
## CL0 CL1 CL2
## "Never Used" "Used over a Decade Ago" "Used in Last Decade"
## CL3 CL4 CL5
## "Used in Last Year" "Used in Last Month" "Used in Last Week"
## CL6
## "Used in Last Day"
for (i in 14:32) {
tmp <- usage[drugs[ ,i]]
drugs[ ,i] <- as.factor(tmp)
levels(drugs[ ,i]) <- usage
}
# age
ageLevels <- c("18-24","25-34","35-44","45-54","55-64","65+")
names(ageLevels) <- c(
-0.95197, # 18-24
-0.07854, # 25-34
0.49788, # 35-44
1.09449, # 45-45
1.82213, # 55-64
2.59171 # 65+
)
tmp <- ageLevels[drugs$age]
drugs$age <- as.factor(tmp)
levels(drugs$age) <- ageLevels
# gender
genderLevels <- c("M", "F")
names(genderLevels) <- c(-0.48246, 0.48246)
tmp <- genderLevels[drugs$gender]
drugs$gender <- as.factor(tmp)
#education
educationLevels <- c(
"Left school before 16 years",
"Left school at 16 years",
"Left school at 17 years",
"Left school at 18 years",
"Some college, no degree",
"Professional certificate",
"University degree",
"Masters degree",
"Doctorate degree"
)
names(educationLevels) <- c(
-2.43591, # Left school before 16 years
-1.73790, # Left school at 16 years
-1.43719, # Left school at 17 years
-1.22751, # Left school at 18 years
-0.61113, # Some college, no degree
-0.05921, # Professional certificate
0.45468, # University degree
1.16365, # Masters degree
1.98437 # Doctorate degree
)
tmp <- educationLevels[drugs$education]
drugs$education <- as.factor(tmp)
levels(drugs$education) <- educationLevels
#country
countryLevels <- c("Australia", "Canada", "New Zealand", "Other",
"Ireland", "UK", "USA")
names(countryLevels) <- c(
-0.09765, # Australia
0.24923, # Canada
-0.46841, # New Zealand
-0.28519, # Other
0.21128, # Ireland
0.96082, # UK
-0.57009 # USA
)
tmp <- countryLevels[drugs$country]
drugs$country <- as.factor(tmp)
#ethnicity
ethnicityLevels <- c("Asian", "Black", "Mixed-Black/Asian", "Mixed-White/Asian",
"Mixed-White/Black", "Other", "White")
names(ethnicityLevels) <- c(
-0.50212, # Asian
-1.10702, # Black
1.90725, # Mixed-Black/Asian
0.12600, # Mixed-White/Asian
-0.22166, # Mixed-White/Black
0.11440, # Other
-0.31685 # White
)
tmp <- ethnicityLevels[drugs$ethnicity]
drugs$ethnicity <- as.factor(tmp)
# NEO PI-R score levels
nscoreLevels <- c(12:60)
escoreLevels <- c(16,18:59)
oscoreLevels <- c(24,26,28:60)
ascoreLevels <- c(12,16,18,23:60)
cscoreLevels <- c(17,19:57,59)
#nscores
scores <- drugs %>%
distinct(nscore) %>%
arrange(nscore) %>%
pull(nscore)
names(nscoreLevels) <- scores
tmp <- nscoreLevels[drugs$nscore]
drugs$nscore <- as.factor(tmp)
#escores
scores <- drugs %>%
distinct(escore) %>%
arrange(escore) %>%
pull(escore)
names(escoreLevels) <- scores
tmp <- escoreLevels[drugs$escore]
drugs$escore <- as.factor(tmp)
#oscores
scores <- drugs %>%
distinct(oscore) %>%
arrange(oscore) %>%
pull(oscore)
names(oscoreLevels) <- scores
tmp <- oscoreLevels[drugs$oscore]
drugs$oscore <- as.factor(tmp)
#ascores
scores <- drugs %>%
distinct(ascore) %>%
arrange(ascore) %>%
pull(ascore)
names(ascoreLevels) <- scores
tmp <- ascoreLevels[drugs$ascore]
drugs$ascore <- as.factor(tmp)
#cscores
scores <- drugs %>%
distinct(cscore) %>%
arrange(cscore) %>%
pull(cscore)
names(cscoreLevels) <- scores
tmp <- cscoreLevels[drugs$cscore]
drugs$cscore <- as.factor(tmp)
# come back to numeric values to allow quantitative analysis
drugs[ ,c(7:11)] <- lapply(drugs[ ,c(7:11)], as.numeric)
if (sum(is.na(drugs)) > 0) {
drop_na(drugs)
} else {
print("No missing values")
}
## [1] "No missing values"
summary(drugs)
## id age gender education
## Min. : 1.0 18-24:643 F:942 Masters degree :506
## 1st Qu.: 474.0 25-34:481 M:943 Doctorate degree :480
## Median : 946.0 35-44:356 Professional certificate:283
## Mean : 945.3 45-54:294 University degree :270
## 3rd Qu.:1417.0 55-64: 93 Left school at 18 years :100
## Max. :1888.0 65+ : 18 Left school at 16 years : 99
## (Other) :147
## country ethnicity nscore escore
## Australia : 54 Asian : 26 Min. : 1.00 Min. : 1.00
## Canada : 87 Black : 33 1st Qu.: 7.00 1st Qu.: 6.00
## Ireland : 20 Mixed-Black/Asian: 3 Median :25.00 Median :24.00
## New Zealand: 5 Mixed-White/Asian: 20 Mean :20.09 Mean :18.14
## Other : 118 Mixed-White/Black: 20 3rd Qu.:31.00 3rd Qu.:28.00
## UK :1044 Other : 63 Max. :49.00 Max. :42.00
## USA : 557 White :1720
## oscore ascore cscore impulsive
## Min. : 1.00 Min. : 1.00 Min. : 1 Min. :-2.555240
## 1st Qu.: 5.00 1st Qu.: 5.00 1st Qu.: 6 1st Qu.:-0.711260
## Median :14.00 Median :15.00 Median :16 Median :-0.217120
## Mean :15.73 Mean :17.18 Mean :18 Mean : 0.007216
## 3rd Qu.:26.00 3rd Qu.:29.00 3rd Qu.:29 3rd Qu.: 0.529750
## Max. :35.00 Max. :41.00 Max. :41 Max. : 2.901610
##
## sensationSeeing alcohol amphetamine
## Min. :-2.078480 Never Used : 34 Never Used :976
## 1st Qu.:-0.525930 Used over a Decade Ago:505 Used over a Decade Ago:102
## Median : 0.079870 Used in Last Decade : 68 Used in Last Decade :243
## Mean :-0.003292 Used in Last Year :287 Used in Last Year : 75
## 3rd Qu.: 0.765400 Used in Last Month :759 Used in Last Month : 61
## Max. : 1.921730 Used in Last Week :198 Used in Last Week :198
## Used in Last Day : 34 Used in Last Day :230
## amyl benzos
## Never Used :1305 Never Used :1000
## Used over a Decade Ago: 3 Used over a Decade Ago: 95
## Used in Last Decade : 237 Used in Last Decade : 234
## Used in Last Year : 24 Used in Last Year : 120
## Used in Last Month : 14 Used in Last Month : 84
## Used in Last Week : 92 Used in Last Week : 236
## Used in Last Day : 210 Used in Last Day : 116
## caffeine cannabis
## Never Used : 27 Never Used :413
## Used over a Decade Ago:1385 Used over a Decade Ago:463
## Used in Last Decade : 24 Used in Last Decade :266
## Used in Last Year : 106 Used in Last Year :140
## Used in Last Month : 273 Used in Last Month :185
## Used in Last Week : 60 Used in Last Week :211
## Used in Last Day : 10 Used in Last Day :207
## chocolate cocaine
## Never Used : 32 Never Used :1038
## Used over a Decade Ago:807 Used over a Decade Ago: 19
## Used in Last Decade : 10 Used in Last Decade : 270
## Used in Last Year :296 Used in Last Year : 99
## Used in Last Month :683 Used in Last Month : 41
## Used in Last Week : 54 Used in Last Week : 258
## Used in Last Day : 3 Used in Last Day : 160
## crack ecstasy
## Never Used :1627 Never Used :1021
## Used over a Decade Ago: 2 Used over a Decade Ago: 21
## Used in Last Decade : 112 Used in Last Decade : 234
## Used in Last Year : 9 Used in Last Year : 156
## Used in Last Month : 9 Used in Last Month : 63
## Used in Last Week : 59 Used in Last Week : 277
## Used in Last Day : 67 Used in Last Day : 113
## heroin ketamine
## Never Used :1605 Never Used :1490
## Used over a Decade Ago: 13 Used over a Decade Ago: 4
## Used in Last Decade : 94 Used in Last Decade : 142
## Used in Last Year : 24 Used in Last Year : 42
## Used in Last Month : 16 Used in Last Month : 33
## Used in Last Week : 65 Used in Last Week : 129
## Used in Last Day : 68 Used in Last Day : 45
## legalHighs lsd
## Never Used :1094 Never Used :1069
## Used over a Decade Ago: 67 Used over a Decade Ago: 13
## Used in Last Decade : 198 Used in Last Decade : 177
## Used in Last Year : 110 Used in Last Year : 97
## Used in Last Month : 64 Used in Last Month : 56
## Used in Last Week : 323 Used in Last Week : 214
## Used in Last Day : 29 Used in Last Day : 259
## meth mushrooms
## Never Used :1429 Never Used :982
## Used over a Decade Ago: 73 Used over a Decade Ago: 4
## Used in Last Decade : 97 Used in Last Decade :260
## Used in Last Year : 50 Used in Last Year :115
## Used in Last Month : 48 Used in Last Month : 40
## Used in Last Week : 149 Used in Last Week :275
## Used in Last Day : 39 Used in Last Day :209
## nicotine semer
## Never Used :428 Never Used :1877
## Used over a Decade Ago:610 Used over a Decade Ago: 3
## Used in Last Decade :204 Used in Last Decade : 1
## Used in Last Year :108 Used in Last Year : 2
## Used in Last Month :157 Used in Last Month : 2
## Used in Last Week :185 Used in Last Week : 0
## Used in Last Day :193 Used in Last Day : 0
## vsa
## Never Used :1455
## Used over a Decade Ago: 7
## Used in Last Decade : 135
## Used in Last Year : 13
## Used in Last Month : 14
## Used in Last Week : 61
## Used in Last Day : 200
write.csv(drugs, "Żarnowski_dane_przekształcone.csv", row.names = FALSE)
saveRDS(drugs, "drug_consumption_data.Rda")
WIZUALIZACJE
Ostatnie użycie wybranych substancji uzależniających z uwzględnieniem płci i wieku (ggplot2).
library(ggplot2)
library(grid)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
plot1 <- ggplot(data = drugs, aes(x = cannabis, fill = age)) +
geom_bar() +
theme_bw() +
facet_wrap(~ gender) +
ggtitle("cannabis") +
theme(plot.title = element_text(hjust = 0.5),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.position = "none") +
coord_flip()
plot2 <- ggplot(data = drugs, aes(x = amphetamine, fill = age)) +
geom_bar() +
theme_bw() +
facet_wrap(~ gender) +
ggtitle("amphetamine") +
theme(plot.title = element_text(hjust = 0.5),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y = element_blank(),
legend.position = "right") +
coord_flip()
plot3 <- ggplot(data = drugs, aes(x = nicotine, fill = age)) +
geom_bar() +
theme_bw() +
facet_wrap(~ gender) +
ggtitle("nicotine") +
theme(plot.title = element_text(hjust = 0.5),
axis.title.x=element_blank(),
axis.ticks.y = element_blank(),
axis.title.y=element_blank(),
legend.position = "none") +
coord_flip()
plot4 <- ggplot(data = drugs, aes(x = ecstasy, fill = age)) +
geom_bar() +
theme_bw() +
facet_wrap(~ gender) +
ggtitle("ecstasy") +
theme(plot.title = element_text(hjust = 0.5),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.ticks.y = element_blank(),
axis.text.y=element_blank()) +
coord_flip()
grid.arrange(plot1,plot2,plot3,plot4, ncol = 2, nrow = 2,
top = "Some drugs usage broken down by gender and age",
left = "Usage",
bottom = "Number of respondens")
Badanie wpływu aktywnego używania wybranych narkotyków na zmianę profilu osobowości względem osoby nieużywającej danej substancji (graphics).
Funkcje filtrujące dane dla użykowników i abstynentów danej substancji.
users <- function(substance) {
drugs %>%
filter(
substance == "Used in Last Day"
| substance == "Used in Last Week"
| substance == "Used in Last Month"
| substance == "Used in Last Year"
)
}
nonusers <- function(substance) {
drugs %>%
filter(
substance == "Never Used"
| substance == "Used over a Decade Ago"
| substance == "Used in Last Decade"
)
}
neoac <- function(substance, active) {
if(active) {
df = users(substance)
} else {
df = nonusers(substance)
}
var <- df %>%
summarise(
n = mean(nscore),
e = mean(escore),
o = mean(oscore),
a = mean(ascore),
c = mean(cscore))
var <- as.numeric(var[1, ])
names(var) <- c("n", "e", "o", "a", "c")
return(var)
}
library(graphics)
par(mfrow=c(2,2))
# cocaine
cocaine_yes <- neoac(drugs$cocaine, T)
cocaine_no <- neoac(drugs$cocaine, F)
scores_range <- range(12, cocaine_yes, cocaine_no, 25)
plot(cocaine_yes,
type="b",
col="red",
pch = 19,
ylim=scores_range,
axes=FALSE,
ann=FALSE)
axis(1, at=1:5, lab=c("n", "e", "o", "a", "c"))
axis(2, las=1, at=12:scores_range[2])
lines(cocaine_no, type="b", pch=15, lty=2, col="blue")
title(main = "Cocaine",
xlab = "NEO PI-R personality dimensions",
ylab = "Average score")
legend(2.5, scores_range[2], c("non-user","user"), cex=0.7,
col=c("blue","red"), pch=15:19, lty=2:1)
box()
# ecstasy
ecstasy_yes <- neoac(drugs$ecstasy, T)
ecstasy_no <- neoac(drugs$ecstasy, F)
scores_range <- range(12, ecstasy_yes, ecstasy_no, 25)
plot(ecstasy_yes,
type="b",
col="red",
pch = 19,
ylim=scores_range,
axes=FALSE,
ann=FALSE)
axis(1, at=1:5, lab=c("n", "e", "o", "a", "c"))
axis(2, las=1, at=12:scores_range[2])
lines(ecstasy_no, type="b", pch=15, lty=2, col="blue")
title(main = "Ecstasy",
xlab = "NEO PI-R personality dimensions",
ylab = "Average score")
legend(2.5, scores_range[2], c("non-user","user"), cex=0.7,
col=c("blue","red"), pch=15:19, lty=2:1)
box()
# meth
meth_yes <- neoac(drugs$meth, T)
meth_no <- neoac(drugs$meth, F)
scores_range <- range(12, meth_yes, meth_no, 25)
plot(meth_yes,
type="b",
col="red",
pch = 19,
ylim=scores_range,
axes=FALSE,
ann=FALSE)
axis(1, at=1:5, lab=c("n", "e", "o", "a", "c"))
axis(2, las=1, at=12:scores_range[2])
lines(meth_no, type="b", pch=15, lty=2, col="blue")
title(main = "Meth",
xlab = "NEO PI-R personality dimensions",
ylab = "Average score")
legend(2.5, scores_range[2], c("non-user","user"), cex=0.7,
col=c("blue","red"), pch=15:19, lty=2:1)
box()
# crack
crack_yes <- neoac(drugs$meth, T)
crack_no <- neoac(drugs$meth, F)
scores_range <- range(12, crack_yes, crack_no, 25)
plot(crack_yes,
type="b",
col="red",
pch = 19,
ylim=scores_range,
axes=FALSE,
ann=FALSE)
axis(1, at=1:5, lab=c("n", "e", "o", "a", "c"))
axis(2, las=1, at=12:scores_range[2])
lines(crack_no, type="b", pch=15, lty=2, col="blue")
title(main = "Crack",
xlab = "NEO PI-R personality dimensions",
ylab = "Average score")
legend(2.5, scores_range[2], c("non-user","user"), cex=0.7,
col=c("blue","red"), pch=15:19, lty=2:1)
box()
mtext("Average personality profiles for users and non-users of some drugs",
side = 3, line = -1.2, outer = TRUE)
par(mfrow=c(1,1))
Badanie zależności pomiędzy używaniem substancji uzależniających w młodości a ryzykiem aktywnego używania obecnie dla osób w wieku 18-34 (lattice).
Przefiltrowanie danych dla wybranych substancji uzależniających i osób w wieku 18-34.
sample_drugs <- c(21:29)
youth <- drugs %>%
filter(age == "18-24" | age == "25-34") %>%
select(all_of(sample_drugs))
df <- count(youth, cocaine)
df <- subset(df, select = n)
rownames(df) <- usage
df <- rename(df, cocaine = n )
df <- mutate(df, crack = count(youth, crack)$n)
df <- mutate(df, ecstasy = count(youth, ecstasy)$n)
df <- mutate(df, heroin = count(youth, heroin)$n)
df <- mutate(df, ketamine = count(youth, ketamine)$n)
df <- mutate(df, legalHighs = count(youth, legalHighs)$n)
df <- mutate(df, lsd = count(youth, lsd)$n)
df <- mutate(df, meth = count(youth, meth)$n)
df <- mutate(df, mushrooms = count(youth, mushrooms)$n)
users <- df %>%
slice(4:7) %>%
summarise_all(sum)
past_users <- df %>%
slice(2:3) %>%
summarise_all(sum)
non_users <- df[1, ]
df <- bind_rows(users, past_users, df[1, ])
df <- as.data.frame(t(df))
df <- add_column(df, .before = 1, drugs = colnames(drugs[,21:29]))
rownames(df) <- c(1:9)
colnames(df) <- c("Substance", "ActiveUsers", "PastUsers", "NonUsers")
df <- mutate(df, EverUsed = ActiveUsers + PastUsers)
df <- mutate(df, activeUsersRatio = ActiveUsers/EverUsed)
df$activeUsersRatio <- round(df$activeUsersRatio, digits = 2)
df[ ,c(1,6)]
library(lattice)
library(latticeExtra)
##
## Attaching package: 'latticeExtra'
## The following object is masked from 'package:ggplot2':
##
## layer
xyplot(ActiveUsers ~ EverUsed, data = df,
main = "Correlation between early use of addictive substance in youth
\nand currently active use",
xlab = "Respondents who tried drugs in the past",
ylab = "Active users",
col = "chocolate",
pch = 19,
fontsize = 15,
panel = function(...) {
panel.fill(col = "gray4")
panel.xyplot(...)
})
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
plot_ly(data = df,
x = ~Substance,
y = ~ActiveUsers,
type = "bar",
name = "Active Users",
marker = list(color = 'rgb(50,125,190)')) %>%
add_trace(y = ~PastUsers, name = "Past Users", marker = list(color = 'rgb(200,200,200)')) %>%
layout(yaxis = list(title = "Users"))
illegal <- c(15,19,21:25,27:29,31)
illegal_count <- rowSums(drugs[ ,illegal] != "Never Used", na.rm = TRUE)
drugs <- mutate(drugs, illegal_active = illegal_count)
ggplot(data = drugs, aes(x = illegal_active, y = education)) +
geom_count(aes(size = ..n..), color='royalblue3', shape = 15) +
ggtitle("Correlation between illegal drugs usage and education") +
labs(x = "Number of illegal drugs ever used", y = "Education level") +
scale_size("Group", range = c(1,8))